#################################################################################################
#This file conducts the analysis on the litigation and credit claiming paper.
#################################################################################################
#set working directory
setwd("~/Dropbox/LobbyingInsideOut/LitigationPaper/JLC replication")

#clear working environment
rm(list=ls()) 

#load packages  
library(tidyverse)
library(ggplot2)
library(readxl)
library(data.table)
library(stargazer)
library(xtable)

##################################################################
# Read in data
##################################################################
federal_orgs <-  read.csv("./data/federally_focused_orgs.csv") %>%
  mutate(federal=1,facebook.handle.final=ifelse(facebook.handle.final=="na",NA,facebook.handle.final)) %>%
  filter(!is.na(facebook.handle.final)) %>% distinct() %>%
  dplyr::mutate(twitter.handle.final = tolower(twitter.handle.final), twitter.handle.final = str_replace(twitter.handle.final, "\\@", ""),
                twitter.handle.final = str_trim(twitter.handle.final),
                Organization = gsub("&", "and", organizationNames), Organization = tolower(Organization),
                facebook.handle.final=ifelse(facebook.handle.final=="",NA,facebook.handle.final))

amicus_orgs <-  fread("./data/AmiciOrgsClean.csv") %>% 
  filter(use==1) %>% mutate(amicus=1) %>% dplyr::mutate(twitter.handle.final = tolower(twitter.handle), twitter.handle.final = str_replace(twitter.handle.final, "\\@", ""),
                                                        twitter.handle.final = str_trim(twitter.handle.final),
                                                        Organization = gsub("&", "and", organizationNames), Organization = tolower(Organization),
                                                        facebook.handle.final=ifelse(facebook.handle.final=="",NA,facebook.handle.final)) %>%
  distinct() %>%  filter(!is.na(facebook.handle.final)) 

all_orgs <- full_join(federal_orgs,amicus_orgs,by=c("facebook.handle.final"="facebook.handle.final",
                                                    "twitter.handle.final"="twitter.handle.final",
                                                    "Type"="Type","LegalOrg"="LegalOrg")) %>%
  dplyr::mutate(og_only=ifelse(federal==1 & is.na(amicus),1,0),amicus = ifelse(is.na(amicus),0,amicus))  %>%
  dplyr::select(Type, amicus, og_only, twitter.handle.final, LegalOrg,facebook.handle.final, federal) %>% 
  dplyr::group_by(facebook.handle.final,twitter.handle.final) %>% fill(everything(),.direction="updown") %>% 
  distinct() %>% filter(Type!= "Think Tank") %>% filter(facebook.handle.final != "0")

### Read in tweet files 
twitter_amicus_orgs_full <- fread("./data/twitter_amicus_orgs_full_JLC.csv")
twitter_federal_orgs_full <- fread("./data/twitter_federal_orgs_full_JLC.csv")
twitter_fuzzy <- fread("./data/twitter_fuzzyjoin_scotus_full_JLC.csv")
twitter_amicus_orgs_dictionary <- fread("./data/twitter_amicus_orgs_dictionary_JLC.csv")
twitter_federal_orgs_dictionary <- fread("./data/twitter_federal_orgs_dictionary_JLC.csv")

##################################################################
# Tables
##################################################################
#Table 4
summary.table <- full_join(twitter_amicus_orgs_dictionary %>% select(-author_id), twitter_federal_orgs_dictionary) %>%
  distinct() %>%
  mutate(Group=ifelse(LegalOrg==1 & og_only==0,"Legal Amicus",
                      ifelse(LegalOrg==1 & og_only==1,"Legal Non-Amicus",
                             ifelse(LegalOrg==0 & og_only==1,"Non-Legal Non-Amicus",
                                    ifelse(LegalOrg==0 & og_only==0,"Non-Legal Amicus",NA))))) %>%
  group_by(Group) %>%
  dplyr::summarise(`Number of Groups`=length(unique(handle)),`Total Posts`=n(),`Amicus Posts`=sum(amicus_words,na.rm=T)-sum(amicus_to_remove,na.rm=T)) %>% 
  mutate(`Percent Amicus Posts`=(`Amicus Posts`/`Total Posts`)*100) %>%
  filter(!is.na(Group))

print(xtable(summary.table,caption="Amicus tweets by group type on Twitter, alongside the percent of posts by group type about amicus terms.",
             label="summarytableaverages_twit",digits=c(0,0,0,0,0,2)),file="Tables/Table4.tex",include.rownames=F)

##################################################################
# Figures
##################################################################
#Figure 2: twitterweeklycourtspostsavgbygroup
twittercourts <- twitter_federal_orgs_full %>% 
  filter(court_words==1) %>% filter(courts_to_remove==0) %>%
  mutate(Post="Courts",Platform="Twitter") 

twitter.group.court <- twittercourts %>%
  mutate(unique_groups = n_distinct(Organization))%>%
  ungroup() %>% 
  group_by(month = cut(as.Date(created_at), "month"),Type) %>%
  dplyr::summarise(postsbygroup=n()/unique_groups) %>% distinct() %>%
  filter(!is.na(Type)) %>% filter(Type!="") 

ggplot(twitter.group.court, aes(x = as.Date(month), y = postsbygroup, color = Type,shape=Type)) + geom_point(size=3) + scale_colour_grey(start = 0.1, end = 0.7) + 
  xlab("Date of Tweet Creation") + ylab("Average Monthly Courts Tweets Per Group Type")+theme(legend.text=element_text(size=14)) + 
  theme(legend.key = element_rect(fill = "white"))+scale_alpha(guide = "none") +scale_y_continuous(trans = 'log10',breaks = c(0,0.1, 0.2, 0.3,0.5), label = c("0","0.1", "0.2", "0.3","0.5"))+theme_bw()+ggtitle("Twitter") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
ggsave("./Figures/Figure2_twitter.pdf",height=6, width=8)

#Figure 3: percpostsbylegal_twitter
plot_legal <- twitter_federal_orgs_full  %>% filter(courts_to_remove==0) %>% filter(!is.na(LegalOrg)) %>%
  count(LegalOrg, court_words) %>%
  group_by(LegalOrg) %>%
  mutate(freq = n / sum(n)) %>% filter(court_words==1) %>% mutate(Type="Court") %>%
  dplyr::bind_rows(., twitter_federal_orgs_full %>% filter(!is.na(LegalOrg))  %>%
                     filter(amicus_to_remove==0) %>%
                     count(LegalOrg, amicus_words) %>%
                     group_by(LegalOrg) %>%
                     mutate(freq = n / sum(n)) %>% filter(amicus_words==1) %>% mutate(Type="Amicus")) %>%
  dplyr::bind_rows(., twitter_federal_orgs_full %>% filter(!is.na(LegalOrg)) %>%
                     count(LegalOrg, case_words) %>%
                     group_by(LegalOrg) %>%
                     mutate(freq = n / sum(n)) %>% filter(case_words==1) %>% mutate(Type="Case Names"))

ggplot(plot_legal, aes(fill=as.factor(LegalOrg), y=freq, x=as.factor(Type))) + 
  geom_bar(position="dodge", stat="identity")  + 
  scale_y_continuous(labels = scales::percent)+theme_bw()+ggtitle("Twitter") +
  scale_fill_grey(start = 0.2, end = .7) + xlab("Tweet Type")+ylab("Percent All Tweets")+ labs(fill = "Legal Organization") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
ggsave("./Figures/Figure3_twitter.pdf",height=6, width=8)


#Figure 5: differencedaysdecision_twitter
time <- twitter_fuzzy %>% mutate(handler=tolower(handle)) %>%
  left_join(., read.csv("./data/AmiciOrgsPairs.csv") ,by=c("docket.x"="docket","handle"="twitter.handle", "Type" = "Type", "LegalOrg" = "LegalOrg")) %>%
  mutate(`Post Created` = as.Date(created_at),
         `Cert Date` = as.numeric(as.Date(as.character(`Post Created`), format="%Y-%m-%d")-as.Date(as.character(certdate), format="%Y-%m-%d")),
         `Argument Date` = as.numeric(as.Date(as.character(`Post Created`), format="%Y-%m-%d")-as.Date(as.character(argued), format="%Y-%m-%d")),
         `Decided Date` = as.numeric(as.Date(as.character(`Post Created`), format="%Y-%m-%d")-as.Date(as.character(opinion), format="%Y-%m-%d")))

data_long <- gather(time, `Difference From`, difference, `Cert Date`:`Decided Date`, factor_key=TRUE)

ggplot(data_long,aes(x=difference,color=`Difference From`,linetype=`Difference From`)) +
  geom_density(size=1.2) + geom_vline(xintercept=0)+
  ggtitle("Twitter")+theme_bw() +
  ylab("Density")+xlab("Difference (Days)")+scale_color_grey(start=.1,end=.8)+xlim(c(-500,500)) +
  scale_linetype_manual(values=c("Cert Date"="dashed","Argument Date"="twodash","Decided Date"="dotted")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) 
ggsave("./Figures/Figure5_twitter.pdf",height=6, width=8)

#Figure 6: differencedaysdecision_legalorg_twitter
ggplot(data_long %>% filter(!is.na(LegalOrg))  %>% mutate(Legal=ifelse(LegalOrg==1,"Legal Org",ifelse(LegalOrg==0,"Not Legal Org",NA))), aes(x=difference,color=`Difference From`,linetype=`Difference From`)) + ggtitle("Twitter") +
  geom_density(size=1.2)+theme_bw() + geom_vline(xintercept=0) +
  ylab("Density")+xlab("Difference (Days)")+scale_color_grey(start=.1,end=.8)+facet_wrap(~Legal) +
  ylim(0,0.01)+xlim(c(-500,500)) +
  scale_linetype_manual(values=c("Cert Date"="dashed","Argument Date"="twodash","Decided Date"="dotted")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(),  axis.line = element_line(colour = "black"))
ggsave("./Figures/Figure6_twitter.pdf",height=6, width=8)

#Figure 7: differencedaysdecisionbygrouptype_twitter
ggplot(data_long %>% filter(Type!=""),aes(x=difference,color=`Difference From`,linetype=`Difference From`)) + ggtitle("Twitter")+
  geom_density(size=1.2) + geom_vline(xintercept=0) +
  ylab("Density")+xlab("Difference (Days)")+scale_color_grey(start=.1,end=.8)+facet_wrap(~Type) +
  xlim(c(-500,500))+theme_bw() +
  scale_linetype_manual(values=c("Cert Date"="dashed","Argument Date"="twodash","Decided Date"="dotted")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black"))
ggsave("./Figures/Figure7_twitter.pdf",height=6, width=8)
